22 

APPENDIX A 

The following is a short, Perl-like pseudocode implementation of an abbreviated 
intermediary server and several finite state machine implementations. The pseudocode is 
5 commented, and is straightforwardly interpreted by anyone skilled in the art of software 
development. 

#!/usr/bin/perl -w 

10 # 

# 
# 

&start_session_server(); 
15 die; 



20 

# 
# 
# 

sub start session server 
25 { 

# load configurations 
&load_config_file( "config.txt" ); 

# load retrieval keys and FSMs 
30 %FSM_HASH = 0; 

%ARG_HASH = 0; 
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&load_FSMs( "FSM_config.txt.pl", \%FSM_HASH, \%ARG_HASH ); 

# start the server part of Session Server 
&start_server( 8080 ); 

5 } 
# 
# 
# 

sub start server 
10 { 

my( Sport ) = @_; 

&initialize_and_start_server( Sport ); 
1 5 while( Srun = true ) { 

# start a new thread 

&listen_and__process_client_request( Surl ); 

20 } 

return; 

} 



30 sub listen_and_process_client_request 

{ 
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my( $url ) = @_; 

my( $rkey ) = &get_retrieval_key_from_url( $url ); 

my( $fsm ) = &get_corresponding_FSM_name( $rkey, \%FSM_HASH ); 
5 $_ = $fsm; 
SWITCH: { 

if( / A FSM session_id$/ ) { &process_FSM session_id( $rkey; ); last; } 

if( / A FSM session_id optimized$/ ) { &process_FSM sessionid optimized( 

$rkey; ); last; } 

10 if( / A FSM HTML_FORM$/ ) { &process_FSM_HTML_FORM( $rkey; ); 

last; } 

# other FSM's can be added here... 



15 # exit this thread 

} 
# 
# 
# 

20 sub processFSM sessionid 

{ 

my( $rkey ) = @_; 

my( @ARG_ARR ) = split( At/, $ARG_HASH{ Srkey } ); 
25 # 

# FSM -- first step 
# 

Sstarturl = shift( @ARG_ARR ); 

$doc = 'wget -O - —load-cookies cookies —save-cookies cookies -non- verbose 
30 \"$starturl\ ,n ; 
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# 

# FSM - step i 
# 

5 my( Sent ) = 1 ; 

foreach Sregexp ( @ARG_ARR ){ 

# Sregexp = "<a[ A >]+?href^([ A > \t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=V'computers and 
peripherals"; 

if( $doc =~ /$regexp/gsi ){ 
10 $nexturl = $l; 

Snexturl =~ s/ A [\"V]*//; 
Snexturl =~ s/[\"V]*$//; 

$doc = 'wget -O load-cookies cookies —save-cookies cookies --non- verbose 

VSnexturlY'^ 
15 }else{ 

return "Nexturl at FSM Step $cnt — cannot be obtained... \n"; 

} 

Sent ++; 

} 

20 

# 

# FSM ~ last step 
# 

$base_href = "<BASE HREF=\"$starturl\">"; 
25 print $base_href, "\n", $doc, "\n"; 

return; 

} 
# 

30 # 
# 
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sub processFSM sessionid optimized 

{ 

my( $rkey ) = @_; 

5 my( @ARG_ARR ) = split( At/, SARGHASH { $rkey } ); 
# 

# FSM -- step 0 
# 

$starturl = shift( @ARG_ARR ); 
10 $doc = "wget -O - —load-cookies cookies —save-cookies cookies -non- verbose 
V'SstarturlV"; 



# 

15 # FSM- step 1 
# 

Sregexp = shift( @ARG_ARR ); 
if( $doc =~ /$regexp/gsi ){ 
$session_ID = $1; 
20 }else{ 

return "Session ID at FSM Step 2 — cannot be obtained... \n"; 

} 



# 

25 # FSM - step 2 
# 

Sfinalurl = shift( @ARG_ARR ); 
Sregexp = shift( @ARG_ARR ); 

Sfinalurl =~ s/$regexp/$session_ID/gs; # substitute new session ID into the final URL 



30 
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$doc = 'wget -O - —load-cookies cookies — save-cookies cookies --non- verbose 
Y'SfinalurlY"; 

$base_href = "<BASE HREF=\"$starturl\">"; 
print $base_href, "\n", $doc, "\n"; 

5 

return; 

} 
# 
# 

10 # 

sub process JFSM__HTML_FORM 
{ 

my( $rkey ) = @_; 



1 5 my( @ ARG_ARR ) = split( At/, $ ARG_HASH { $rkey } ); 
# 

# FSM ~ first step 
# 

$form_url = shift( @ARG_ARR ); 
20 $doc = x wget -O - —load-cookies cookies —save-cookies cookies —non-verbose 
\"$form_url\" > ; 



# 

25 # FSM - step i 
# 

my( Sent ) = 1 ; 

foreach $field_value ( @ARG_ARR ){ 

( Sfield, Svalue ) = split( A/, $field_value, 2 ); 
30 $doc =~ s/$field/$value/gs; # substitute value into the corresponding FORM 

field 
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Sent ++; 

} 
# 

5 # FSM - last step 
# 

$base_href = "<BASE HREF=\"$form_url\">"; 
print $base_href, "\n", $doc, "\n"; 

10 return; 
} 
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# 

# This is the FSM configuration file for the Session Server 
# 

5 sonyOOOOla FSM_session_id http://www.sonystyle.com/ <a[ A >]+?href=([ A > 

\t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=V'computers and peripherals <a[ A >]+?href^([ A > 
\t\r\n]*)[ A >]*>[ \t\r\n]*VAIO\&reg\; Notebooks< <a[ A >]+?href=([ A > \t\r\n]*)[ A >]*>Zl 
Series< <a[ A >]+?href=([ A > \t\r\n] *)[ A >] *>[ A <] *<img[ A >]+alt=Y'PCGZ 1 RAP 1 KITBV 
sonyOOOO lb FSM_session_id http://www.sonystyle.com/ <a[ A >]+?href=([ A > 

1 0 \t\r\n] *)[ A >] *>[ A <] *<img[ A >]+alt=Y'computers and peripherals <a[ A >]+?hrefH[ A > 
\t\r\n]*)[ A >]*>[ \t\r\n]*VAIO\&reg\; Notebooks< <a[ A >]+?href=([ A > \tV\n]*)[ A >]*>Zl 
Series< <a[ A >]+?href=([ A > \t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=\ M PCG-Zl VAP2V 
sony00002 FSM_session_id http://www.sonystyle.com/ <a[ A >]+?href=([ A > 
\t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=V'computers and peripherals <a[ A >]+?href^=([ A > 

15 \t\r\n]*)[ A >]*>[ \t\r\n]*VAIO\&reg\; Notebooks< <a[ A >]+?hrefH[ A > \t\r\n]*)[ A >]*>V505 
Series< 

sony00003 FSM_session_id http://www.sonystyle.com/ <a[ A >]+?href=([ A > 
\t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=V , computers and peripherals <a[ A >]+?href=([ A > 
\t\r\n]*)[ A >]*>[ \t\r\n]*VAIO\&reg\; Notebooks< <a[ A >]+?href=([ A > \t\r\n]*)[ A >]*>GRT 
20 Series< 

sony00004 FSM_session_id http://www.sonystyle.com/ <a[ A >]+?href=([ A> 
\t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=V , computers and peripherals <a[ A >]+?hreH[ A > 
\t\r\n]*)[ A >]*>[ \t\r\n]*VAIO\&reg\; Notebooks< <a[ A >]+?href=([ A > \tV\n]*)[ A >]*>TR 
Series< 

25 sony00005 FSM_session_id http://www.sonystyle.com/ <a[ A >]+?href=([ A > 

\t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=\ M computers and peripherals <a[ A >]+?href=([ A > 
\t\r\n]*)[ A >]*>[ \t\r\n]*VAIO\&reg\; Notebooks< <a[ A >]+?hreH[ A > \t\r\n]*)[ A >]*>FRV 
Series< 



30 



30 



sony_opt00001a FSM_session_id optimized http://www.sonystyle.com/ 

<a[ A >]+?href=([ A > \t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=\"computers and peripherals 
\;sid=([ A =\?]+)[=\?] http://www.sonystyle.com/is- 
bin/INTERSHOP.enfinity/eCS/Store/en/-/USD/SY_DisplayProductInformation- 

5 Start;sid=_SESSION_ID =?CategoryName=cpu_VAIONotebookComputers_Z 1 Series&Pr 

oductSKU=PCGZ 1 RAP 1 KITB&Dept=cpu SESSION_ID_ 
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deltaformOOOOla http://www.delta.com/ 

(. *<input[ A >] *?name=Y'DEPT_l \"[ A >] *)value=\"\"(. *) 
(.*<input[ A >]*?name=\"DEST_l\"[ A >]*)value=\"\"(.*) 



$lvalue=\"SEA\"$2 
$lvalue=\"SFO\"$2 
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#!/usr/bin/perl -w 

# 
# 

5 # 

Sstarturl = "http://www.sonystyle.com/"; 

$doc = 'wget -O — load-cookies cookies —save-cookies cookies —non-verbose \"$starturl\" x ; 

10 

# 

# FSM - step 1 
# 

Sregexp = "<a[ A >]+?hre£=([ A > \tV\n]*)[ A >]*>[ A <]*<img[ A >]+alt=\"computers and 
15 peripherals"; 

if( $doc =~ /$regexp/gsi ){ 

$nexturl = $l; 

Snexturl =~ s/ A [\"Y]*//; 

$nexturl =~ s/[\"Y]*$//; 

20 $doc = "wget -O - —load-cookies cookies —save-cookies cookies -non- verbose 
V'SnexturlV"; 
}else{ 

die "Nexturl at FSM Step 1 - cannot be obtained.. An"; 

} 



# 

# FSM - step 2 
# 

30 Sregexp = "<a[ A >]+?hre£=([ A > \t\r\n]*)[ A >]*>[ \t\r\n]*VAIO\&reg\; Notebooks<"; 
if( $doc =~ /$regexp/gsi ){ 
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Snexturl = $1; 
Snexturl — s/ A [\"V]*//; 
Snexturl =~ s/[\"V]*$//; 

$doc = 'wget -O - —load-cookies cookies —save-cookies cookies —non-verbose 
5 \"SnexturlY n ; 
}else{ 

die "Nexturl at FSM Step 2 — cannot be obtained.. An"; 

} 

10 

# 

# FSM - step 3 
# 

Sregexp = "<a[ A >]+?hrefH[ A > \t\r\n]*)[ A >]*>Zl Series<"; 
1 5 ifl( $doc =~ /$regexp/gsi ) { 
Snexturl = $1; 
Snexturl =~ s/ A [\"V]*//; 
Snexturl =~ s/[\"V]*$//; 

Sdoc = 'wget -O - —load-cookies cookies —save-cookies cookies —non-verbose 
20 \ ,, $nexturl\" , ; 
}else{ 

die "Nexturl at FSM Step 3 — cannot be obtained. .An"; 

} 



# 

# FSM - step 4 
# 

Sregexp = "<a[ A >]+?href^([ A > \t\r\n]*)[ A >]*>[ A <]*<img[ A >]+alt=\"PCGZlRAPlKiTB\""; 
30 if( Sdoc =~ /Sregexp/gsi ) { 
Snexturl = $1; 
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Snexturl =~ s/ A [\"V]*//; 
Snexturl =~ s/[\"Y]*$//; 

$doc = 'wget -O - —load-cookies cookies — save-cookies cookies —non-verbose 
V$nexturl\ ,n ; 
5 }else{ 

die "Nexturl at FSM Step 4 — cannot be obtained... \n"; 

} 

10 

# 

# FSM ~ step 5 

# 

$base_href = "<BASE HREF=\"http://www.sonystyle.comA">"; 
1 5 print $base_href, "\n", $doc, "\n"; # return page to client 



die; 



